package edu.ncsu.contractminer.event.ner;

import edu.stanford.nlp.ie.crf.*;
import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation;
import edu.stanford.nlp.util.Triple;

import java.util.List;
import java.io.IOException;

/**
 * This is a demo of calling CRFClassifier programmatically.
 * <p>
 * Usage:
 * <code> java -cp "stanford-ner.jar:." NERDemo [serializedClassifier [fileName]]</code>
 * <p>
 * If arguments aren't specified, they default to
 * ner-eng-ie.crf-3-all2006.ser.gz and some hardcoded sample text.
 * <p>
 * To use CRFClassifier from the command line: java -mx400m
 * edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier [classifier] -textFile
 * [file] Or if the file is already tokenized and one word per line, perhaps in
 * a tab-separated value format with extra columns for part-of-speech tag, etc.,
 * use the version below (note the 's' instead of the 'x'): java -mx400m
 * edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier [classifier] -testFile
 * [file]
 * 
 * @author Jenny Finkel
 * @author Christopher Manning
 */

public class NERDemo {

	public static void main(String[] args) throws IOException {

		String serializedClassifier = "model/all.3class.distsim.crf.ser.gz";

		if (args.length > 0) {
			serializedClassifier = args[0];
		}

		AbstractSequenceClassifier classifier = CRFClassifier
				.getClassifierNoExceptions(serializedClassifier);

		/*
		 * For either a file to annotate or for the hardcoded text example, this
		 * demo file shows two ways to process the output, for teaching
		 * purposes. For the file, it shows both how to run NER on a String and
		 * how to run it on a whole file. For the hard-coded String, it shows
		 * how to run it on a single sentence, and how to do this and produce an
		 * inline XML output format.
		 */
		if (args.length > 1) {
			String fileContents = IOUtils.slurpFile(args[1]);
			List<List<CoreLabel>> out = classifier.classify(fileContents);
			for (List<CoreLabel> sentence : out) {
				for (CoreLabel word : sentence) {
					System.out.print(word.word() + '/'
							+ word.get(AnswerAnnotation.class) + ' ');
				}
				System.out.println();
			}
			out = classifier.classifyFile(args[1]);
			for (List<CoreLabel> sentence : out) {
				for (CoreLabel word : sentence) {
					System.out.print(word.word() + '/'
							+ word.get(AnswerAnnotation.class) + ' ');
				}
				System.out.println();
			}

		} else {
			String s1 = "Hello Xibin Gao from Microsoft";
			String s2 = "I go to school at Stanford University, which is located in California.";
			System.out.println(classifier.classifyToString(s1));
			List<Triple<String, Integer, Integer>> list = classifier
					.classifyToCharacterOffsets(s1);
			System.out.println(list);
			for (int i = 0; i < list.size(); i++) {
				Triple<String, Integer, Integer> t = list.get(i);
				if (t.first().equals("ORGANIZATION")) {
					System.out.println(s1.subSequence(t.second, t.third).toString());
				}
			}

			// System.out.println(classifier.classifyWithInlineXML(s2));
			// System.out.println(classifier.classifyToString(s2, "xml", true));
		}
	}

}
